home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Technotools
/
Technotools (Chestnut CD-ROM)(1993).ISO
/
lang_c
/
bmg
/
bmg.c
next >
Wrap
Text File
|
1986-08-29
|
4KB
|
162 lines
/*
* bmg.c -> Boyer-Moore-Gosper search routines
*
* Adapted from:
* Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
* original paper (CACM, October, 1977). No delta1 or delta2. According to
* experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
* practical value. However, to improve for worst case input, integrating
* the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
* February 1986) deserves consideration.
*
* James A. Woods Copyright (c) 1986
* NASA Ames Research Center
*
* 29 April 1986 Jeff Mogul Stanford
* Adapted as a set of subroutines for use by a program. No
* regular-expression support.
*
* 29 August 1986 Frank Whaley Beyond Words
* Trimmed for speed and other dirty tricks.
*/
#include <ctype.h>
#include <string.h>
#include "bmg.h"
/* forward declarations */
static int strncmpi(unsigned char *, unsigned char *, int);
static void lower(char *);
/*
* bmgCompile() -> compile Boyer-Moore delta table
*
* bmgCompile() compiles the delta table for the given search string, and
* initializes the search argument structure. This structure must be
* passed to the bmgSearch() function described below.
*/
void bmgCompile(pat, arg, ignore)
char *pat; /* the pattern */
bmgARG *arg; /* argument data */
int ignore; /* TRUE to ignore case */
{
int i, /* general ctr */
patlen; /* pattern length */
patlen = strlen(pat);
#ifdef CHECKS
if (patlen > bmgMAXPAT)
{
fprintf(stderr, "bmgCompile: pattern length exceeds %d\n",
bmgMAXPAT);
pat[bmgMAXPAT] = 0; /* truncate */
}
#endif
strcpy(arg->pat, pat);
if (arg->ignore = ignore)
lower(arg->pat);
memset(arg->delta, patlen, 256);
for (i = 0; i < patlen; ++i)
arg->delta[((unsigned char *)pat)[i]] = patlen - i - 1;
if (ignore) /* tweak upper case if ignore on */
for (i = 0; i < patlen; ++i)
arg->delta[toupper(((unsigned char *)pat)[i])] = patlen - i - 1;
}
/*
* bmgSearch() -> scan for match
*
* bmgSearch() performs a Boyer-Moore-Gosper search of the given buffer
* for the string described by the given search argument structure. The
* match action function "action" will be called for each match found.
* This function should return non-zero to continue searching, or 0 to
* terminate the search. bmgSearch() returns the total number of
* matches found.
*/
int bmgSearch(buffer, buflen, arg, action)
char *buffer; /* buffer to search */
int buflen; /* length of buffer */
bmgARG *arg; /* search argument */
int (*action)(); /* action function */
{
char *s; /* temp ptr for comparisons */
int inc, /* position increment */
k, /* current buffer index */
nhits, /* match ctr */
patlen; /* pattern length */
nhits = 0;
k = (patlen = strlen(arg->pat)) - 1;
for (;;)
{
/*
* the following (unsigned char *) type casts save a
* few clocks by freeing us from some XCHGs
*/
while ((inc = arg->delta[((unsigned char *)buffer)[k]]) &&
((k += inc) < buflen))
;
if (k >= buflen)
break;
s = buffer + (k++ - (patlen - 1));
if (!(arg->ignore ? strncmpi(s, arg->pat, patlen) : strncmp(s, arg->pat, patlen)))
{
++nhits;
if (!(*action)(buffer, s - buffer))
return (nhits);
}
}
return (nhits);
}
/*****************************************************************************/
/***************************** local functions *****************************/
/*****************************************************************************/
/*
* lower() -> lower case a string
*/
static void lower(s)
char *s;
{
while (*s)
{
*s = tolower(*s);
++s;
}
}
/*
* strncmpi() -> strncmp(), ignore case
*/
static int strncmpi(s, t, n)
unsigned char *s,
*t;
int n;
{
for (; n-- && (tolower(*s) == tolower(*t)); ++t)
if (!*s++)
return (0); /* equal */
if (n < 0) /* maximum hit */
return (0); /* equal */
return ((tolower(*s) > tolower(*t)) ? 1 : (-1)); /* not equal */
}
/*
* END of bmg.c
*/